{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "0517d170",
   "metadata": {},
   "outputs": [],
   "source": [
    "# raw_ad_triples\n",
    "#\n",
    "# created by LuYF-Lemon-love <luyanfeng_nlp@qq.com>  on January 11, 2023\n",
    "#\n",
    "# 该脚本展示了如何提取 DRKG 中药物治疗 Alzheimer's disease 的三元组.\n",
    "#\n",
    "# 需要的包:\n",
    "#          numpy\n",
    "#          pandas\n",
    "#          time\n",
    "#\n",
    "# 需要的文件:\n",
    "#          ../data/drkg/drkg.tsv\n",
    "#\n",
    "# 输出的文件:\n",
    "#          ./drug_treat_ad.tsv\n",
    "#          ./ad_drugs.txt"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f99d216f",
   "metadata": {},
   "source": [
    "### 导入需要的库"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "0bfa8458",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import time"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e8f770cd",
   "metadata": {},
   "source": [
    "### Alzheimer's disease"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b0d8c692",
   "metadata": {},
   "outputs": [],
   "source": [
    "AD_disease_list = [\n",
    "'Disease::DOID:10652',\n",
    "'Disease::MESH:C536599',\n",
    "'Disease::MESH:D000544'\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "affcde79",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(AD_disease_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "af23142b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Disease::DOID:10652', 'Disease::MESH:C536599', 'Disease::MESH:D000544']"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "AD_disease_list"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "81c78866",
   "metadata": {},
   "source": [
    "### Treatment relation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "e4817e49",
   "metadata": {},
   "outputs": [],
   "source": [
    "treatment_list = [\n",
    "'DRUGBANK::treats::Compound:Disease',\n",
    "'GNBR::T::Compound:Disease',\n",
    "'Hetionet::CtD::Compound:Disease'\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "75d8535f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['DRUGBANK::treats::Compound:Disease',\n",
       " 'GNBR::T::Compound:Disease',\n",
       " 'Hetionet::CtD::Compound:Disease']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "treatment_list"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8a1834c3",
   "metadata": {},
   "source": [
    "### 读取知识图谱"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "7775ad67",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('../data/drkg/drkg.tsv', sep='\\t', names = ['h', 'r', 't'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "b897da71",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>h</th>\n",
       "      <th>r</th>\n",
       "      <th>t</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Gene::2157</td>\n",
       "      <td>bioarx::HumGenHumGen:Gene:Gene</td>\n",
       "      <td>Gene::2157</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Gene::2157</td>\n",
       "      <td>bioarx::HumGenHumGen:Gene:Gene</td>\n",
       "      <td>Gene::5264</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Gene::2157</td>\n",
       "      <td>bioarx::HumGenHumGen:Gene:Gene</td>\n",
       "      <td>Gene::2158</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Gene::2157</td>\n",
       "      <td>bioarx::HumGenHumGen:Gene:Gene</td>\n",
       "      <td>Gene::3309</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Gene::2157</td>\n",
       "      <td>bioarx::HumGenHumGen:Gene:Gene</td>\n",
       "      <td>Gene::28912</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5874256</th>\n",
       "      <td>Gene::29099</td>\n",
       "      <td>STRING::REACTION::Gene:Gene</td>\n",
       "      <td>Gene::1643</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5874257</th>\n",
       "      <td>Gene::51645</td>\n",
       "      <td>STRING::REACTION::Gene:Gene</td>\n",
       "      <td>Gene::3183</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5874258</th>\n",
       "      <td>Gene::865</td>\n",
       "      <td>STRING::CATALYSIS::Gene:Gene</td>\n",
       "      <td>Gene::983</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5874259</th>\n",
       "      <td>Gene::1066</td>\n",
       "      <td>STRING::BINDING::Gene:Gene</td>\n",
       "      <td>Gene::7365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5874260</th>\n",
       "      <td>Gene::6118</td>\n",
       "      <td>STRING::BINDING::Gene:Gene</td>\n",
       "      <td>Gene::1111</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5874261 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                   h                               r            t\n",
       "0         Gene::2157  bioarx::HumGenHumGen:Gene:Gene   Gene::2157\n",
       "1         Gene::2157  bioarx::HumGenHumGen:Gene:Gene   Gene::5264\n",
       "2         Gene::2157  bioarx::HumGenHumGen:Gene:Gene   Gene::2158\n",
       "3         Gene::2157  bioarx::HumGenHumGen:Gene:Gene   Gene::3309\n",
       "4         Gene::2157  bioarx::HumGenHumGen:Gene:Gene  Gene::28912\n",
       "...              ...                             ...          ...\n",
       "5874256  Gene::29099     STRING::REACTION::Gene:Gene   Gene::1643\n",
       "5874257  Gene::51645     STRING::REACTION::Gene:Gene   Gene::3183\n",
       "5874258    Gene::865    STRING::CATALYSIS::Gene:Gene    Gene::983\n",
       "5874259   Gene::1066      STRING::BINDING::Gene:Gene   Gene::7365\n",
       "5874260   Gene::6118      STRING::BINDING::Gene:Gene   Gene::1111\n",
       "\n",
       "[5874261 rows x 3 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d8fabe1c",
   "metadata": {},
   "source": [
    "### 提取药物治疗 AD 的三元组"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "13cad366",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "运行时间为 241 seconds\n"
     ]
    }
   ],
   "source": [
    "start = time.perf_counter()\n",
    "\n",
    "triples = []\n",
    "\n",
    "for index, Series in df.iterrows():\n",
    "    if Series['h'].startswith('Compound::DB') and Series['r'] in treatment_list and Series['t'] in AD_disease_list:\n",
    "        triples.append([Series['h'], Series['r'], Series['t']])\n",
    "        \n",
    "end = time.perf_counter()\n",
    "\n",
    "print(\"运行时间为\", round(end-start), 'seconds')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "07634e2a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Compound::DB00656\tDRUGBANK::treats::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00674\tDRUGBANK::treats::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00843\tDRUGBANK::treats::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01043\tDRUGBANK::treats::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01403\tDRUGBANK::treats::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB09148\tDRUGBANK::treats::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB09149\tDRUGBANK::treats::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB09151\tDRUGBANK::treats::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB03929\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01750\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB02543\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB12116\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00244\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB03575\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00325\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00313\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00641\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB06527\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB03467\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB11780\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB11725\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB04743\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB11748\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB03614\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB12148\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB13132\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB06479\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB06750\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB06712\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB15579\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB13668\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB04864\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB12052\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01381\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB15357\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB04892\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00533\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00337\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB05289\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB11805\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB08842\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB03128\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB11100\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01370\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB04365\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00126\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00945\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00928\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00993\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB04115\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB06756\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01200\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00490\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00136\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00564\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB09157\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB14086\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00122\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01394\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB09130\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB11672\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00987\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01219\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00694\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00746\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01234\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00472\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00712\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00158\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB03843\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB02530\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00674\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB04660\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB04703\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB05381\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB04422\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00741\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01611\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01050\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00328\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB13178\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB04815\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB11336\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01235\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01065\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01043\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00331\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00134\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00422\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01017\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB14128\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB13588\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB02701\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00393\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00368\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00859\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00981\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB14500\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00635\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00571\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB13084\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01698\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01037\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00382\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01041\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB12310\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB11473\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00809\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00115\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB11094\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00682\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01392\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01593\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00215\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00091\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB04557\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00459\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01796\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00207\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00734\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB11797\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB09163\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01645\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB03994\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00877\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB01104\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB11068\tGNBR::T::Compound:Disease\tDisease::MESH:D000544\n",
      "Compound::DB00989\tHetionet::CtD::Compound:Disease\tDisease::DOID:10652\n",
      "Compound::DB00674\tHetionet::CtD::Compound:Disease\tDisease::DOID:10652\n",
      "Compound::DB01043\tHetionet::CtD::Compound:Disease\tDisease::DOID:10652\n",
      "Compound::DB00843\tHetionet::CtD::Compound:Disease\tDisease::DOID:10652\n"
     ]
    }
   ],
   "source": [
    "for triple in triples:\n",
    "    print(f\"{triple[0]}\\t{triple[1]}\\t{triple[2]}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "702c0bf7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "131"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(triples)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "cec5875c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 输出结果到文件\n",
    "\n",
    "f = open('./drug_treat_ad.tsv', 'w')\n",
    "for triple in triples:\n",
    "    f.write('%s\\t%s\\t%s\\n' % (triple[0], triple[1], triple[2]))\n",
    "f.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "afce7c03",
   "metadata": {},
   "source": [
    "### 提取所有治疗药物"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "a5c1dd08",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 利用集合进行药物去重\n",
    "ad_drugs = set()\n",
    "\n",
    "for triple in triples:\n",
    "    ad_drugs.add(triple[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "a3bf98ee",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Compound::DB00091',\n",
       " 'Compound::DB00115',\n",
       " 'Compound::DB00122',\n",
       " 'Compound::DB00126',\n",
       " 'Compound::DB00134',\n",
       " 'Compound::DB00136',\n",
       " 'Compound::DB00158',\n",
       " 'Compound::DB00207',\n",
       " 'Compound::DB00215',\n",
       " 'Compound::DB00244',\n",
       " 'Compound::DB00313',\n",
       " 'Compound::DB00325',\n",
       " 'Compound::DB00328',\n",
       " 'Compound::DB00331',\n",
       " 'Compound::DB00337',\n",
       " 'Compound::DB00368',\n",
       " 'Compound::DB00382',\n",
       " 'Compound::DB00393',\n",
       " 'Compound::DB00422',\n",
       " 'Compound::DB00459',\n",
       " 'Compound::DB00472',\n",
       " 'Compound::DB00490',\n",
       " 'Compound::DB00533',\n",
       " 'Compound::DB00564',\n",
       " 'Compound::DB00571',\n",
       " 'Compound::DB00635',\n",
       " 'Compound::DB00641',\n",
       " 'Compound::DB00656',\n",
       " 'Compound::DB00674',\n",
       " 'Compound::DB00682',\n",
       " 'Compound::DB00694',\n",
       " 'Compound::DB00712',\n",
       " 'Compound::DB00734',\n",
       " 'Compound::DB00741',\n",
       " 'Compound::DB00746',\n",
       " 'Compound::DB00809',\n",
       " 'Compound::DB00843',\n",
       " 'Compound::DB00859',\n",
       " 'Compound::DB00877',\n",
       " 'Compound::DB00928',\n",
       " 'Compound::DB00945',\n",
       " 'Compound::DB00981',\n",
       " 'Compound::DB00987',\n",
       " 'Compound::DB00989',\n",
       " 'Compound::DB00993',\n",
       " 'Compound::DB01017',\n",
       " 'Compound::DB01037',\n",
       " 'Compound::DB01041',\n",
       " 'Compound::DB01043',\n",
       " 'Compound::DB01050',\n",
       " 'Compound::DB01065',\n",
       " 'Compound::DB01104',\n",
       " 'Compound::DB01200',\n",
       " 'Compound::DB01219',\n",
       " 'Compound::DB01234',\n",
       " 'Compound::DB01235',\n",
       " 'Compound::DB01370',\n",
       " 'Compound::DB01381',\n",
       " 'Compound::DB01392',\n",
       " 'Compound::DB01394',\n",
       " 'Compound::DB01403',\n",
       " 'Compound::DB01593',\n",
       " 'Compound::DB01611',\n",
       " 'Compound::DB01645',\n",
       " 'Compound::DB01698',\n",
       " 'Compound::DB01750',\n",
       " 'Compound::DB01796',\n",
       " 'Compound::DB02530',\n",
       " 'Compound::DB02543',\n",
       " 'Compound::DB02701',\n",
       " 'Compound::DB03128',\n",
       " 'Compound::DB03467',\n",
       " 'Compound::DB03575',\n",
       " 'Compound::DB03614',\n",
       " 'Compound::DB03843',\n",
       " 'Compound::DB03929',\n",
       " 'Compound::DB03994',\n",
       " 'Compound::DB04115',\n",
       " 'Compound::DB04365',\n",
       " 'Compound::DB04422',\n",
       " 'Compound::DB04557',\n",
       " 'Compound::DB04660',\n",
       " 'Compound::DB04703',\n",
       " 'Compound::DB04743',\n",
       " 'Compound::DB04815',\n",
       " 'Compound::DB04864',\n",
       " 'Compound::DB04892',\n",
       " 'Compound::DB05289',\n",
       " 'Compound::DB05381',\n",
       " 'Compound::DB06479',\n",
       " 'Compound::DB06527',\n",
       " 'Compound::DB06712',\n",
       " 'Compound::DB06750',\n",
       " 'Compound::DB06756',\n",
       " 'Compound::DB08842',\n",
       " 'Compound::DB09130',\n",
       " 'Compound::DB09148',\n",
       " 'Compound::DB09149',\n",
       " 'Compound::DB09151',\n",
       " 'Compound::DB09157',\n",
       " 'Compound::DB09163',\n",
       " 'Compound::DB11068',\n",
       " 'Compound::DB11094',\n",
       " 'Compound::DB11100',\n",
       " 'Compound::DB11336',\n",
       " 'Compound::DB11473',\n",
       " 'Compound::DB11672',\n",
       " 'Compound::DB11725',\n",
       " 'Compound::DB11748',\n",
       " 'Compound::DB11780',\n",
       " 'Compound::DB11797',\n",
       " 'Compound::DB11805',\n",
       " 'Compound::DB12052',\n",
       " 'Compound::DB12116',\n",
       " 'Compound::DB12148',\n",
       " 'Compound::DB12310',\n",
       " 'Compound::DB13084',\n",
       " 'Compound::DB13132',\n",
       " 'Compound::DB13178',\n",
       " 'Compound::DB13588',\n",
       " 'Compound::DB13668',\n",
       " 'Compound::DB14086',\n",
       " 'Compound::DB14128',\n",
       " 'Compound::DB14500',\n",
       " 'Compound::DB15357',\n",
       " 'Compound::DB15579'}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ad_drugs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "8ca2eba5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "126"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(ad_drugs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "f063127c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 输出结果到文件\n",
    "\n",
    "f = open('./ad_drugs.txt', 'w')\n",
    "for drug in ad_drugs:\n",
    "    f.write('%s\\n' % (drug))\n",
    "f.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
